home *** CD-ROM | disk | FTP | other *** search
/ SGI Developer Toolbox 6.1 / SGI Developer Toolbox 6.1 - Disc 4.iso / src / exampleCode / MP / timer / regular / linpackd.l < prev    next >
Encoding:
Text File  |  1994-08-02  |  70.8 KB  |  1,058 lines

  1.  
  2.  PFA/SGI                     10.0 k092805 910529            _MAIN              Source          10-Aug-1993  10:08:55      Page   1
  3.  
  4.  Footnotes     Actions        DO Loops    Line
  5.  
  6.                DIR                            1   # 1 "linpackd.f"
  7.                                               2   *
  8.                                               3   *PLEASE NOTE THAT netlib HAS MOVED, THE NEW ADDRESS IS netlib@ornl.gov.
  9.                                               4   *THE OLD ADDRESS, netlib@mcs.anl.gov, WILL BE TURNED OFF SOON.
  10.                                               5   *
  11.                                               6   *** from netlib, Fri Jul 27 14:07:10 EDT 1990 ***
  12.                                               7         double precision second
  13.                                               8         double precision aa(200,200),a(201,200),b(200),x(200)
  14.                                               9         double precision time(8,6),cray,ops,total,norma,normx
  15.                                              10         double precision resid,residn,eps,epslon
  16.                                              11         integer ipvt(200)
  17.                                              12         lda = 201
  18.                                              13         ldaa = 200
  19.                                              14   c
  20.                                              15         n = 100
  21.                                              16         cray = .056
  22.                                              17         write(6,1)
  23.                                              18       1 format(' Please send the results of this run to:'//
  24.                                              19        $       ' Jack J. Dongarra'/
  25.                                              20        $       ' Computer Science Department'/
  26.                                              21        $       ' University of Tennessee'/
  27.                                              22        $       ' Knoxville, Tennessee 37996-1300'//
  28.                                              23        $       ' Fax: 615-974-8296'//
  29.                                              24        $       ' Internet: dongarra@cs.utk.edu'/)
  30.                SO                            25         ops = (2.0d0*n**3)/3.0d0 + 2.0d0*n**2
  31.                                              26   c
  32.                                              27            call matgen(a,lda,n,b,norma)
  33.                                              28            t1 = second()
  34.                                              29            call dgefa(a,lda,n,ipvt,info)
  35.                                              30            time(1,1) = second() - t1
  36.                                              31            t1 = second()
  37.                                              32            call dgesl(a,lda,n,ipvt,b,0)
  38.                                              33            time(1,2) = second() - t1
  39.                                              34            total = time(1,1) + time(1,2)
  40.                                              35   c
  41.                                              36   c     compute a residual to verify results.
  42.                                              37   c
  43.   1  2         SO C           +---------     38            do 10 i = 1,n
  44.                               *              39               x(i) = b(i)
  45.                               *_________     40      10    continue
  46.                                              41            call matgen(a,lda,n,b,norma)
  47.   1  2         SO C           +---------     42            do 20 i = 1,n
  48.                SO             *              43               b(i) = -b(i)
  49.                               *_________     44      20    continue
  50.                                              45            call dmxpy(n,b,n,lda,x,a)
  51.                                              46            resid = 0.0
  52.                                              47            normx = 0.0
  53.   1  2         SO             +---------     48            do 30 i = 1,n
  54.   3            DD             !              49               resid = dmax1( resid, dabs(b(i)) )
  55.   4            DD             !              50               normx = dmax1( normx, dabs(x(i)) )
  56.                               !_________     51      30    continue
  57.  
  58.  PFA/SGI                     10.0 k092805 910529            _MAIN              Source          10-Aug-1993  10:08:55      Page   2
  59.  
  60.                                              52            eps = epslon(1.0d0)
  61.                                              53            residn = resid/( n*norma*normx*eps )
  62.                                              54            write(6,40)
  63.                                              55      40    format('     norm. resid      resid           machep',
  64.                                              56        $          '         x(1)          x(n)')
  65.                                              57            write(6,50) residn,resid,eps,x(1),x(n)
  66.                                              58      50    format(1p5e16.8)
  67.                                              59   c
  68.                                              60            write(6,60) n
  69.                                              61      60    format(//'    times are reported for matrices of order ',i5)
  70.                                              62            write(6,70)
  71.                                              63      70    format(6x,'dgefa',6x,'dgesl',6x,'total',5x,'mflops',7x,'unit',
  72.                                              64        $         6x,'ratio')
  73.                                              65   c
  74.                                              66            time(1,3) = total
  75.                                              67            time(1,4) = ops/(1.0d6*total)
  76.                                              68            time(1,5) = 2.0d0/time(1,4)
  77.                                              69            time(1,6) = total/cray
  78.                                              70            write(6,80) lda
  79.                                              71      80    format(' times for array with leading dimension of',i4)
  80.                                              72            write(6,110) (time(1,i),i=1,6)
  81.                                              73   c
  82.                                              74            call matgen(a,lda,n,b,norma)
  83.                                              75            t1 = second()
  84.                                              76            call dgefa(a,lda,n,ipvt,info)
  85.                                              77            time(2,1) = second() - t1
  86.                                              78            t1 = second()
  87.                                              79            call dgesl(a,lda,n,ipvt,b,0)
  88.                                              80            time(2,2) = second() - t1
  89.                                              81            total = time(2,1) + time(2,2)
  90.                                              82            time(2,3) = total
  91.                                              83            time(2,4) = ops/(1.0d6*total)
  92.                                              84            time(2,5) = 2.0d0/time(2,4)
  93.                                              85            time(2,6) = total/cray
  94.                                              86   c
  95.                                              87            call matgen(a,lda,n,b,norma)
  96.                                              88            t1 = second()
  97.                                              89            call dgefa(a,lda,n,ipvt,info)
  98.                                              90            time(3,1) = second() - t1
  99.                                              91            t1 = second()
  100.                                              92            call dgesl(a,lda,n,ipvt,b,0)
  101.                                              93            time(3,2) = second() - t1
  102.                                              94            total = time(3,1) + time(3,2)
  103.                                              95            time(3,3) = total
  104.                                              96            time(3,4) = ops/(1.0d6*total)
  105.                                              97            time(3,5) = 2.0d0/time(3,4)
  106.                                              98            time(3,6) = total/cray
  107.                                              99   c
  108.   5            SO                           100            ntimes = 10
  109.                                             101            tm2 = 0
  110.                                             102            t1 = second()
  111.                SO             +---------    103            do 90 i = 1,ntimes
  112.   6            NO NCS         !             104               tm = second()
  113.  
  114.  PFA/SGI                     10.0 k092805 910529            _MAIN              Source          10-Aug-1993  10:08:55      Page   3
  115.  
  116.   7  8  9 10
  117.   11 12        NO DD NCS      !             105               call matgen(a,lda,n,b,norma)
  118.   6 13         NO DD NCS      !             106               tm2 = tm2 + second() - tm
  119.   7  8  9 14
  120.   15 16        NO DD NCS      !             107               call dgefa(a,lda,n,ipvt,info)
  121.                               !_________    108      90    continue
  122.                SO                           109            time(4,1) = (second() - t1 - tm2)/ntimes
  123.                                             110            t1 = second()
  124.                SO             +---------    111            do 100 i = 1,ntimes
  125.   7  8  9 11
  126.   15 17        NO DD NCS      !             112               call dgesl(a,lda,n,ipvt,b,0)
  127.                               !_________    113     100    continue
  128.                SO                           114            time(4,2) = (second() - t1)/ntimes
  129.                                             115            total = time(4,1) + time(4,2)
  130.                                             116            time(4,3) = total
  131.                                             117            time(4,4) = ops/(1.0d6*total)
  132.                                             118            time(4,5) = 2.0d0/time(4,4)
  133.                                             119            time(4,6) = total/cray
  134.                                             120   c
  135.                                             121            write(6,110) (time(2,i),i=1,6)
  136.                                             122            write(6,110) (time(3,i),i=1,6)
  137.                                             123            write(6,110) (time(4,i),i=1,6)
  138.                                             124     110    format(6(1pe11.3))
  139.                                             125   c
  140.                                             126            call matgen(aa,ldaa,n,b,norma)
  141.                                             127            t1 = second()
  142.                                             128            call dgefa(aa,ldaa,n,ipvt,info)
  143.                                             129            time(5,1) = second() - t1
  144.                                             130            t1 = second()
  145.                                             131            call dgesl(aa,ldaa,n,ipvt,b,0)
  146.                                             132            time(5,2) = second() - t1
  147.                                             133            total = time(5,1) + time(5,2)
  148.                                             134            time(5,3) = total
  149.                                             135            time(5,4) = ops/(1.0d6*total)
  150.                                             136            time(5,5) = 2.0d0/time(5,4)
  151.                                             137            time(5,6) = total/cray
  152.                                             138   c
  153.                                             139            call matgen(aa,ldaa,n,b,norma)
  154.                                             140            t1 = second()
  155.                                             141            call dgefa(aa,ldaa,n,ipvt,info)
  156.                                             142            time(6,1) = second() - t1
  157.                                             143            t1 = second()
  158.                                             144            call dgesl(aa,ldaa,n,ipvt,b,0)
  159.                                             145            time(6,2) = second() - t1
  160.                                             146            total = time(6,1) + time(6,2)
  161.                                             147            time(6,3) = total
  162.                                             148            time(6,4) = ops/(1.0d6*total)
  163.                                             149            time(6,5) = 2.0d0/time(6,4)
  164.                                             150            time(6,6) = total/cray
  165.                                             151   c
  166.                                             152            call matgen(aa,ldaa,n,b,norma)
  167.                                             153            t1 = second()
  168.                                             154            call dgefa(aa,ldaa,n,ipvt,info)
  169.  
  170.  PFA/SGI                     10.0 k092805 910529            _MAIN              Source          10-Aug-1993  10:08:55      Page   4
  171.  
  172.                                             155            time(7,1) = second() - t1
  173.                                             156            t1 = second()
  174.                                             157            call dgesl(aa,ldaa,n,ipvt,b,0)
  175.                                             158            time(7,2) = second() - t1
  176.                                             159            total = time(7,1) + time(7,2)
  177.                                             160            time(7,3) = total
  178.                                             161            time(7,4) = ops/(1.0d6*total)
  179.                                             162            time(7,5) = 2.0d0/time(7,4)
  180.                                             163            time(7,6) = total/cray
  181.                                             164   c
  182.   5            SO                           165            ntimes = 10
  183.                                             166            tm2 = 0
  184.                                             167            t1 = second()
  185.                SO             +---------    168            do 120 i = 1,ntimes
  186.   6            NO NCS         !             169               tm = second()
  187.   7 10 11 12
  188.   18 19        NO DD NCS      !             170               call matgen(aa,ldaa,n,b,norma)
  189.   6 13         NO DD NCS      !             171               tm2 = tm2 + second() - tm
  190.   7 14 15 16
  191.   18 19        NO DD NCS      !             172               call dgefa(aa,ldaa,n,ipvt,info)
  192.                               !_________    173     120    continue
  193.                SO                           174            time(8,1) = (second() - t1 - tm2)/ntimes
  194.                                             175            t1 = second()
  195.                SO             +---------    176            do 130 i = 1,ntimes
  196.   7 11 15 17
  197.   18 19        NO DD NCS      !             177               call dgesl(aa,ldaa,n,ipvt,b,0)
  198.                               !_________    178     130    continue
  199.                SO                           179            time(8,2) = (second() - t1)/ntimes
  200.                                             180            total = time(8,1) + time(8,2)
  201.                                             181            time(8,3) = total
  202.                                             182            time(8,4) = ops/(1.0d6*total)
  203.                                             183            time(8,5) = 2.0d0/time(8,4)
  204.                                             184            time(8,6) = total/cray
  205.                                             185   c
  206.                                             186            write(6,140) ldaa
  207.                                             187     140    format(/' times for array with leading dimension of',i4)
  208.                                             188            write(6,110) (time(5,i),i=1,6)
  209.                                             189            write(6,110) (time(6,i),i=1,6)
  210.                                             190            write(6,110) (time(7,i),i=1,6)
  211.                                             191            write(6,110) (time(8,i),i=1,6)
  212.                                             192         stop
  213.                                             193         end
  214.  
  215.  
  216.  Abbreviations Used
  217.   NO       not optimized
  218.   DD       data dependence
  219.   SO       scalar optimization
  220.   DIR      directive
  221.   NCS      non-concurrent-stmt
  222.   C        concurrentized
  223.  
  224.  
  225.  Footnote List
  226.  
  227.  PFA/SGI                     10.0 k092805 910529            _MAIN              Source          10-Aug-1993  10:08:55      Page   5
  228.  
  229.    1: scalar optimization        Loop unrolled 4 times to improve scalar performance.
  230.    2: scalar optimization        Cleanup loop for loop unrolling.
  231.    3: data dependence            Data dependence involving this line due to variable "RESID".
  232.    4: data dependence            Data dependence involving this line due to variable "NORMX".
  233.    5: scalar optimization        Statement deleted because of scalar optimization.
  234.    6: not optimized              Unoptimizable call to "SECOND" found.
  235.    7: data dependence            Data dependence involving this line due to variable "N".
  236.    8: data dependence            Data dependence involving this line due to variable "LDA".
  237.    9: data dependence            Data dependence involving this line due to variable "A".
  238.   10: data dependence            Data dependence involving this line due to variable "NORMA".
  239.   11: data dependence            Data dependence involving this line due to variable "B".
  240.   12: not optimized              Unoptimizable call to "MATGEN" found.
  241.   13: data dependence            Data dependence involving this line due to variable "TM2".
  242.   14: data dependence            Data dependence involving this line due to variable "INFO".
  243.   15: data dependence            Data dependence involving this line due to variable "IPVT".
  244.   16: not optimized              Unoptimizable call to "DGEFA" found.
  245.   17: not optimized              Unoptimizable call to "DGESL" found.
  246.   18: data dependence            Data dependence involving this line due to variable "LDAA".
  247.   19: data dependence            Data dependence involving this line due to variable "AA".
  248.  
  249.  PFA/SGI                     10.0 k092805 910529            _MAIN           Loop Summary       10-Aug-1993  10:08:55      Page   6
  250.  
  251.  
  252.      Loop Summary
  253.  
  254.               From    To      Loop       Loop       at      Unroll   Unroll   Iteration
  255.       Loop#   line    line    label      index      nest    weight   factor   workload   Status
  256.       1       38      40      Do 10      I          1       3        4                   scalar mode preferable
  257.       2       38      40      Do 10      I          1       3        4        3          concurrentized
  258.       3       42      44      Do 20      I          1       4        4                   scalar mode preferable
  259.       4       42      44      Do 20      I          1       4        4        4          concurrentized
  260.       5       48      51      Do 30      I          1       12       4                   unrolled
  261.       6       48      51      Do 30      I          1       12       4                   unrolled
  262.       7       103     108     Do 90      I          1       204      1                   unoptimizable call (DGEFA)
  263.       8       111     113     Do 100     I          1       50       1                   unoptimizable call (DGESL)
  264.       9       168     173     Do 120     I          1       204      1                   unoptimizable call (DGEFA)
  265.       10      176     178     Do 130     I          1       50       1                   unoptimizable call (DGESL)
  266.  
  267.  PFA/SGI                     10.0 k092805 910529            MATGEN             Source          10-Aug-1993  10:08:55      Page   7
  268.  
  269.  Footnotes     Actions        DO Loops    Line
  270.  
  271.                                             194         subroutine matgen(a,lda,n,b,norma)
  272.                                             195         double precision a(lda,1),b(1),norma
  273.                                             196   c
  274.                                             197         init = 1325
  275.                                             198         norma = 0.0
  276.   1            OPT            +---------    199         do 30 j = 1,n
  277.   2  3         SO             *+--------    200            do 20 i = 1,n
  278.   4            DD             *!            201               init = mod(3125*init,65536)
  279.   4            DD             *!            202               a(i,j) = (init - 32768.0)/16384.0
  280.   5            DD             *!            203               norma = dmax1(dabs(a(i,j)), norma)
  281.                               *!________    204      20    continue
  282.                               *_________    205      30 continue
  283.   1            OPT            +---------    206         do 35 i = 1,n
  284.                               !             207             b(i) = 0.0
  285.                               !_________    208      35 continue
  286.   2  3         LR SO          +---------    209         do 50 j = 1,n
  287.                LR SO C        !+--------    210            do 40 i = 1,n
  288.   6            DD             !*            211               b(i) = b(i) + a(i,j)
  289.                               !*________    212      40    continue
  290.                               !_________    213      50 continue
  291.                                             214         return
  292.                                             215         end
  293.  
  294.  
  295.  Abbreviations Used
  296.   OPT      optimized
  297.   LR       loop reordering
  298.   DD       data dependence
  299.   SO       scalar optimization
  300.   C        concurrentized
  301.  
  302.  
  303.  Footnote List
  304.    1: optimized                  Loop has been fused with others to reduce overhead.
  305.    2: scalar optimization        Loop unrolled 4 times to improve scalar performance.
  306.    3: scalar optimization        Cleanup loop for loop unrolling.
  307.    4: data dependence            Data dependence involving this line due to variable "INIT".
  308.    5: data dependence            Data dependence involving this line due to variable "NORMA".
  309.    6: data dependence            Data dependence involving this line due to variable "B".
  310.  
  311.  PFA/SGI                     10.0 k092805 910529            MATGEN          Loop Summary       10-Aug-1993  10:08:55      Page   8
  312.  
  313.  
  314.      Loop Summary
  315.  
  316.               From    To      Loop       Loop       at      Unroll   Unroll   Iteration
  317.       Loop#   line    line    label      index      nest    weight   factor   workload   Status
  318.       1       199     208     Do 30      J          2       2        4                   scalar mode preferable
  319.       2       200     204     Do 20      I          3       16       4                   unrolled
  320.       3       200     204     Do 20      I          3       16       4                   unrolled
  321.       4       199     208     Do 30      J          2       2        4                   scalar mode preferable
  322.       5       199     208     Do 30      J          2       2        4        2          concurrentized
  323.       6       210     212     Do 40      I          1                         6          concurrentized
  324.       7       209     213     Do 50      J          2       4        4                   unrolled; already in a parallel loop
  325.       8       209     213     Do 50      J          2       4        4                   unrolled; already in a parallel loop
  326.       9       206     208     Do 35      I          1                                    unrolled completely or removed
  327.  
  328.  PFA/SGI                     10.0 k092805 910529            DGEFA              Source          10-Aug-1993  10:08:55      Page   9
  329.  
  330.  Footnotes     Actions        DO Loops    Line
  331.  
  332.                                             216         subroutine dgefa(a,lda,n,ipvt,info)
  333.                                             217         integer lda,n,ipvt(1),info
  334.                                             218         double precision a(lda,1)
  335.                                             219   c
  336.                                             220   c     dgefa factors a double precision matrix by gaussian elimination.
  337.                                             221   c
  338.                                             222   c     dgefa is usually called by dgeco, but it can be called
  339.                                             223   c     directly with a saving in time if  rcond  is not needed.
  340.                                             224   c     (time for dgeco) = (1 + 9/n)*(time for dgefa) .
  341.                                             225   c
  342.                                             226   c     on entry
  343.                                             227   c
  344.                                             228   c        a       double precision(lda, n)
  345.                                             229   c                the matrix to be factored.
  346.                                             230   c
  347.                                             231   c        lda     integer
  348.                                             232   c                the leading dimension of the array  a .
  349.                                             233   c
  350.                                             234   c        n       integer
  351.                                             235   c                the order of the matrix  a .
  352.                                             236   c
  353.                                             237   c     on return
  354.                                             238   c
  355.                                             239   c        a       an upper triangular matrix and the multipliers
  356.                                             240   c                which were used to obtain it.
  357.                                             241   c                the factorization can be written  a = l*u  where
  358.                                             242   c                l  is a product of permutation and unit lower
  359.                                             243   c                triangular matrices and  u  is upper triangular.
  360.                                             244   c
  361.                                             245   c        ipvt    integer(n)
  362.                                             246   c                an integer vector of pivot indices.
  363.                                             247   c
  364.                                             248   c        info    integer
  365.                                             249   c                = 0  normal value.
  366.                                             250   c                = k  if  u(k,k) .eq. 0.0 .  this is not an error
  367.                                             251   c                     condition for this subroutine, but it does
  368.                                             252   c                     indicate that dgesl or dgedi will divide by zero
  369.                                             253   c                     if called.  use  rcond  in dgeco for a reliable
  370.                                             254   c                     indication of singularity.
  371.                                             255   c
  372.                                             256   c     linpack. this version dated 08/14/78 .
  373.                                             257   c     cleve moler, university of new mexico, argonne national lab.
  374.                                             258   c
  375.                                             259   c     subroutines and functions
  376.                                             260   c
  377.                                             261   c     blas daxpy,dscal,idamax
  378.                                             262   c
  379.                                             263   c     internal variables
  380.                                             264   c
  381.                                             265         double precision t
  382.                                             266         integer idamax,j,k,kp1,l,nm1
  383.  
  384.  PFA/SGI                     10.0 k092805 910529            DGEFA              Source          10-Aug-1993  10:08:55      Page  10
  385.  
  386.                                             267   c
  387.                                             268   c
  388.                                             269   c     gaussian elimination with partial pivoting
  389.                                             270   c
  390.                                             271         info = 0
  391.   1            SO                           272         nm1 = n - 1
  392.                SO                           273         if (nm1 .lt. 1) go to 70
  393.   2            NO SO NCS      +---------    274         do 60 k = 1, nm1
  394.   1            SO             !             275            kp1 = k + 1
  395.                               !             276   c
  396.                               !             277   c        find l = pivot index
  397.                               !             278   c
  398.                               !             279            l = idamax(n-k+1,a(k,k),1) + k - 1
  399.                               !             280            ipvt(k) = l
  400.                               !             281   c
  401.                               !             282   c        zero pivot implies this column already triangularized
  402.                               !             283   c
  403.                               !             284            if (a(l,k) .eq. 0.0d0) go to 40
  404.                               !             285   c
  405.                               !             286   c           interchange if necessary
  406.                               !             287   c
  407.                SO             !             288               if (l .eq. k) go to 10
  408.                               !             289                  t = a(l,k)
  409.                               !             290                  a(l,k) = a(k,k)
  410.                               !             291                  a(k,k) = t
  411.                               !             292      10       continue
  412.                               !             293   c
  413.                               !             294   c           compute multipliers
  414.                               !             295   c
  415.                SO             !             296               t = -1.0d0/a(k,k)
  416.                               !             297               call dscal(n-k,t,a(k+1,k),1)
  417.                               !             298   c
  418.                               !             299   c           row elimination with column indexing
  419.                               !             300   c
  420.                SO NCS         !+--------    301               do 30 j = kp1, n
  421.                               !!            302                  t = a(l,j)
  422.                SO             !!            303                  if (l .eq. k) go to 20
  423.                               !!            304                     a(l,j) = a(k,j)
  424.                               !!            305                     a(k,j) = t
  425.                               !!            306      20          continue
  426.   3            NO NCS         !!            307                  call daxpy(n-k,t,a(k+1,k),1,a(k+1,j),1)
  427.   3            NO             !!________    308      30       continue
  428.                               !             309            go to 50
  429.                               !             310      40    continue
  430.                               !             311               info = k
  431.                               !             312      50    continue
  432.                               !_________    313      60 continue
  433.                                             314      70 continue
  434.                                             315         ipvt(n) = n
  435.                                             316         if (a(n,n) .eq. 0.0d0) info = n
  436.                                             317         return
  437.                                             318         end
  438.  
  439.  PFA/SGI                     10.0 k092805 910529            DGEFA              Source          10-Aug-1993  10:08:55      Page  11
  440.  
  441.  
  442.  
  443.  Abbreviations Used
  444.   NO       not optimized
  445.   SO       scalar optimization
  446.   NCS      non-concurrent-stmt
  447.  
  448.  
  449.  Footnote List
  450.    1: scalar optimization        Statement deleted because of scalar optimization.
  451.    2: not optimized              No optimizable statements found.
  452.    3: not optimized              Unoptimizable call to "DAXPY" found.
  453.  
  454.  PFA/SGI                     10.0 k092805 910529            DGEFA           Loop Summary       10-Aug-1993  10:08:55      Page  12
  455.  
  456.  
  457.      Loop Summary
  458.  
  459.               From    To      Loop       Loop       at      Unroll   Unroll   Iteration
  460.       Loop#   line    line    label      index      nest    weight   factor   workload   Status
  461.       1       274     313     Do 60      K          1                                    no optimizable statements
  462.       2       301     308     Do 30      J          2       53       1                   unrolled
  463.       3       303     303     Do 30      J          2       53       1                   unrolled
  464.  
  465.  PFA/SGI                     10.0 k092805 910529            DGESL              Source          10-Aug-1993  10:08:55      Page  13
  466.  
  467.  Footnotes     Actions        DO Loops    Line
  468.  
  469.                                             319         subroutine dgesl(a,lda,n,ipvt,b,job)
  470.                                             320         integer lda,n,ipvt(1),job
  471.                                             321         double precision a(lda,1),b(1)
  472.                                             322   c
  473.                                             323   c     dgesl solves the double precision system
  474.                                             324   c     a * x = b  or  trans(a) * x = b
  475.                                             325   c     using the factors computed by dgeco or dgefa.
  476.                                             326   c
  477.                                             327   c     on entry
  478.                                             328   c
  479.                                             329   c        a       double precision(lda, n)
  480.                                             330   c                the output from dgeco or dgefa.
  481.                                             331   c
  482.                                             332   c        lda     integer
  483.                                             333   c                the leading dimension of the array  a .
  484.                                             334   c
  485.                                             335   c        n       integer
  486.                                             336   c                the order of the matrix  a .
  487.                                             337   c
  488.                                             338   c        ipvt    integer(n)
  489.                                             339   c                the pivot vector from dgeco or dgefa.
  490.                                             340   c
  491.                                             341   c        b       double precision(n)
  492.                                             342   c                the right hand side vector.
  493.                                             343   c
  494.                                             344   c        job     integer
  495.                                             345   c                = 0         to solve  a*x = b ,
  496.                                             346   c                = nonzero   to solve  trans(a)*x = b  where
  497.                                             347   c                            trans(a)  is the transpose.
  498.                                             348   c
  499.                                             349   c     on return
  500.                                             350   c
  501.                                             351   c        b       the solution vector  x .
  502.                                             352   c
  503.                                             353   c     error condition
  504.                                             354   c
  505.                                             355   c        a division by zero will occur if the input factor contains a
  506.                                             356   c        zero on the diagonal.  technically this indicates singularity
  507.                                             357   c        but it is often caused by improper arguments or improper
  508.                                             358   c        setting of lda .  it will not occur if the subroutines are
  509.                                             359   c        called correctly and if dgeco has set rcond .gt. 0.0
  510.                                             360   c        or dgefa has set info .eq. 0 .
  511.                                             361   c
  512.                                             362   c     to compute  inverse(a) * c  where  c  is a matrix
  513.                                             363   c     with  p  columns
  514.                                             364   c           call dgeco(a,lda,n,ipvt,rcond,z)
  515.                                             365   c           if (rcond is too small) go to ...
  516.                                             366   c           do 10 j = 1, p
  517.                                             367   c              call dgesl(a,lda,n,ipvt,c(1,j),0)
  518.                                             368   c        10 continue
  519.                                             369   c
  520.  
  521.  PFA/SGI                     10.0 k092805 910529            DGESL              Source          10-Aug-1993  10:08:55      Page  14
  522.  
  523.                                             370   c     linpack. this version dated 08/14/78 .
  524.                                             371   c     cleve moler, university of new mexico, argonne national lab.
  525.                                             372   c
  526.                                             373   c     subroutines and functions
  527.                                             374   c
  528.                                             375   c     blas daxpy,ddot
  529.                                             376   c
  530.                                             377   c     internal variables
  531.                                             378   c
  532.                                             379         double precision ddot,t
  533.                                             380         integer k,kb,l,nm1
  534.                                             381   c
  535.   1            SO                           382         nm1 = n - 1
  536.                SO                           383         if (job .ne. 0) go to 50
  537.                                             384   c
  538.                                             385   c        job = 0 , solve  a * x = b
  539.                                             386   c        first solve  l*y = b
  540.                                             387   c
  541.                SO                           388            if (nm1 .lt. 1) go to 30
  542.                SO NCS         +---------    389            do 20 k = 1, nm1
  543.   1            SO             !             390               l = ipvt(k)
  544.                SO             !             391               t = b(l)
  545.                SO             !             392               if (l .eq. k) go to 10
  546.                SO             !             393                  b(l) = b(k)
  547.                               !             394                  b(k) = t
  548.                               !             395      10       continue
  549.   2            NO NCS         !             396               call daxpy(n-k,t,a(k+1,k),1,b(k+1),1)
  550.   2            NO             !_________    397      20    continue
  551.                                             398      30    continue
  552.                                             399   c
  553.                                             400   c        now solve  u*x = y
  554.                                             401   c
  555.                NCS            +---------    402            do 40 kb = 1, n
  556.                               !             403               k = n + 1 - kb
  557.                               !             404               b(k) = b(k)/a(k,k)
  558.                SO             !             405               t = -b(k)
  559.   2            NO NCS         !             406               call daxpy(k-1,t,a(1,k),1,b(1),1)
  560.   2            NO             !_________    407      40    continue
  561.                                             408         go to 100
  562.                                             409      50 continue
  563.                                             410   c
  564.                                             411   c        job = nonzero, solve  trans(a) * x = b
  565.                                             412   c        first solve  trans(u)*y = b
  566.                                             413   c
  567.                NCS            +---------    414            do 60 k = 1, n
  568.   3            NO NCS         !             415               t = ddot(k-1,a(1,k),1,b(1),1)
  569.                               !             416               b(k) = (b(k) - t)/a(k,k)
  570.   3            NO             !_________    417      60    continue
  571.                                             418   c
  572.                                             419   c        now solve trans(l)*x = y
  573.                                             420   c
  574.                SO                           421            if (nm1 .lt. 1) go to 90
  575.                SO NCS         +---------    422            do 80 kb = 1, nm1
  576.  
  577.  PFA/SGI                     10.0 k092805 910529            DGESL              Source          10-Aug-1993  10:08:55      Page  15
  578.  
  579.   1            SO             !             423               k = n - kb
  580.   3            NO SO NCS      !             424               b(k) = b(k) + ddot(n-k,a(k+1,k),1,b(k+1),1)
  581.   1            SO             !             425               l = ipvt(k)
  582.                SO             !             426               if (l .eq. k) go to 70
  583.                SO             !             427                  t = b(l)
  584.                SO             !             428                  b(l) = b(k)
  585.                SO             !             429                  b(k) = t
  586.                               !             430      70       continue
  587.   3            NO             !_________    431      80    continue
  588.                                             432      90    continue
  589.                                             433     100 continue
  590.                                             434         return
  591.                                             435         end
  592.  
  593.  
  594.  Abbreviations Used
  595.   NO       not optimized
  596.   SO       scalar optimization
  597.   NCS      non-concurrent-stmt
  598.  
  599.  
  600.  Footnote List
  601.    1: scalar optimization        Statement deleted because of scalar optimization.
  602.    2: not optimized              Unoptimizable call to "DAXPY" found.
  603.    3: not optimized              Unoptimizable call to "DDOT" found.
  604.  
  605.  PFA/SGI                     10.0 k092805 910529            DGESL           Loop Summary       10-Aug-1993  10:08:55      Page  16
  606.  
  607.  
  608.      Loop Summary
  609.  
  610.               From    To      Loop       Loop       at      Unroll   Unroll   Iteration
  611.       Loop#   line    line    label      index      nest    weight   factor   workload   Status
  612.       1       389     397     Do 20      K          1       62       1                   unoptimizable call (DAXPY)
  613.       2       402     407     Do 40      KB         1       62       1                   unoptimizable call (DAXPY)
  614.       3       414     417     Do 60      K          1       58       1                   unoptimizable call (DDOT)
  615.       4       422     431     Do 80      KB         1       74       1                   unoptimizable call (DDOT)
  616.  
  617.  PFA/SGI                     10.0 k092805 910529            DAXPY              Source          10-Aug-1993  10:08:55      Page  17
  618.  
  619.  Footnotes     Actions        DO Loops    Line
  620.  
  621.                                             436         subroutine daxpy(n,da,dx,incx,dy,incy)
  622.                                             437   c
  623.                                             438   c     constant times a vector plus a vector.
  624.                                             439   c     jack dongarra, linpack, 3/11/78.
  625.                                             440   c
  626.                                             441         double precision dx(1),dy(1),da
  627.                                             442         integer i,incx,incy,ix,iy,m,mp1,n
  628.                                             443   c
  629.                SO                           444         if(n.le.0)return
  630.                                             445         if (da .eq. 0.0d0) return
  631.                SO                           446         if(incx.eq.1.and.incy.eq.1)go to 20
  632.                                             447   c
  633.                                             448   c        code for unequal increments or equal increments
  634.                                             449   c          not equal to 1
  635.                                             450   c
  636.                                             451         ix = 1
  637.                                             452         iy = 1
  638.                SO                           453         if(incx.lt.0)ix = (-n+1)*incx + 1
  639.                SO                           454         if(incy.lt.0)iy = (-n+1)*incy + 1
  640.   1  2  3      Q  SO C        +---------    455         do 10 i = 1,n
  641.   4            DD             *             456           dy(iy) = dy(iy) + da*dx(ix)
  642.                               *             457           ix = ix + incx
  643.                               *             458           iy = iy + incy
  644.                               *_________    459      10 continue
  645.                                             460         return
  646.                                             461   c
  647.                                             462   c        code for both increments equal to 1
  648.                                             463   c
  649.                                             464      20 continue
  650.   1  2         SO C           +---------    465         do 30 i = 1,n
  651.                               *             466           dy(i) = dy(i) + da*dx(i)
  652.                               *_________    467      30 continue
  653.                                             468         return
  654.                                             469         end
  655.  
  656.  
  657.  Abbreviations Used
  658.   DD       data dependence
  659.   Q        question
  660.   SO       scalar optimization
  661.   C        concurrentized
  662.  
  663.  
  664.  Footnote List
  665.    1: scalar optimization        Loop unrolled 4 times to improve scalar performance.
  666.    2: scalar optimization        Cleanup loop for loop unrolling.
  667.    3: question                   Is "INCY .EQ. 0" in the loop beginning at this statement?
  668.    4: data dependence            Data dependence involving this line due to variable "DY".
  669.  
  670.  PFA/SGI                     10.0 k092805 910529            DAXPY           Loop Summary       10-Aug-1993  10:08:55      Page  18
  671.  
  672.  
  673.      Loop Summary
  674.  
  675.               From    To      Loop       Loop       at      Unroll   Unroll   Iteration
  676.       Loop#   line    line    label      index      nest    weight   factor   workload   Status
  677.       1       455     459     Do 10      I          1       11       4                   scalar mode preferable
  678.       2       455     459     Do 10      I          1       11       4                   scalar mode preferable
  679.       3       455     459     Do 10      I          1       11       4                   scalar mode preferable
  680.       4       455     459     Do 10      I          1       11       4        20         concurrentized
  681.       5       465     467     Do 30      I          1       6        4                   scalar mode preferable
  682.       6       465     467     Do 30      I          1       6        4        6          concurrentized
  683.  
  684.  PFA/SGI                     10.0 k092805 910529            DDOT               Source          10-Aug-1993  10:08:55      Page  19
  685.  
  686.  Footnotes     Actions        DO Loops    Line
  687.  
  688.                                             470         double precision function ddot(n,dx,incx,dy,incy)
  689.                                             471   c
  690.                                             472   c     forms the dot product of two vectors.
  691.                                             473   c     jack dongarra, linpack, 3/11/78.
  692.                                             474   c
  693.                                             475         double precision dx(1),dy(1),dtemp
  694.                                             476         integer i,incx,incy,ix,iy,m,mp1,n
  695.                                             477   c
  696.                                             478         ddot = 0.0d0
  697.                                             479         dtemp = 0.0d0
  698.                SO                           480         if(n.le.0)return
  699.                SO                           481         if(incx.eq.1.and.incy.eq.1)go to 20
  700.                                             482   c
  701.                                             483   c        code for unequal increments or equal increments
  702.                                             484   c          not equal to 1
  703.                                             485   c
  704.                                             486         ix = 1
  705.                                             487         iy = 1
  706.                SO                           488         if(incx.lt.0)ix = (-n+1)*incx + 1
  707.                SO                           489         if(incy.lt.0)iy = (-n+1)*incy + 1
  708.   1  2         SO             +---------    490         do 10 i = 1,n
  709.   3            DD             !             491           dtemp = dtemp + dx(ix)*dy(iy)
  710.                               !             492           ix = ix + incx
  711.                               !             493           iy = iy + incy
  712.                               !_________    494      10 continue
  713.                                             495         ddot = dtemp
  714.                                             496         return
  715.                                             497   c
  716.                                             498   c        code for both increments equal to 1
  717.                                             499   c
  718.                                             500      20 continue
  719.   1  2         SO             +---------    501         do 30 i = 1,n
  720.   3            DD             !             502           dtemp = dtemp + dx(i)*dy(i)
  721.                               !_________    503      30 continue
  722.                                             504         ddot = dtemp
  723.                                             505         return
  724.                                             506         end
  725.  
  726.  
  727.  Abbreviations Used
  728.   DD       data dependence
  729.   SO       scalar optimization
  730.  
  731.  
  732.  Footnote List
  733.    1: scalar optimization        Loop unrolled 4 times to improve scalar performance.
  734.    2: scalar optimization        Cleanup loop for loop unrolling.
  735.    3: data dependence            Data dependence involving this line due to variable "DTEMP".
  736.  
  737.  PFA/SGI                     10.0 k092805 910529            DDOT            Loop Summary       10-Aug-1993  10:08:55      Page  20
  738.  
  739.  
  740.      Loop Summary
  741.  
  742.               From    To      Loop       Loop       at      Unroll   Unroll   Iteration
  743.       Loop#   line    line    label      index      nest    weight   factor   workload   Status
  744.       1       490     494     Do 10      I          1       9        4                   unrolled
  745.       2       490     494     Do 10      I          1       9        4                   unrolled
  746.       3       501     503     Do 30      I          1       5        4                   unrolled
  747.       4       501     503     Do 30      I          1       5        4                   unrolled
  748.  
  749.  PFA/SGI                     10.0 k092805 910529            DSCAL              Source          10-Aug-1993  10:08:55      Page  21
  750.  
  751.  Footnotes     Actions        DO Loops    Line
  752.  
  753.                                             507         subroutine  dscal(n,da,dx,incx)
  754.                                             508   c
  755.                                             509   c     scales a vector by a constant.
  756.                                             510   c     jack dongarra, linpack, 3/11/78.
  757.                                             511   c
  758.                                             512         double precision da,dx(1)
  759.                                             513         integer i,incx,m,mp1,n,nincx
  760.                                             514   c
  761.                SO                           515         if(n.le.0)return
  762.                SO                           516         if(incx.eq.1)go to 20
  763.                                             517   c
  764.                                             518   c        code for increment not equal to 1
  765.                                             519   c
  766.   1            SO                           520         nincx = n*incx
  767.                SO C           +---------    521         do 10 i = 1,nincx,incx
  768.                               *             522           dx(i) = da*dx(i)
  769.                               *_________    523      10 continue
  770.                                             524         return
  771.                                             525   c
  772.                                             526   c        code for increment equal to 1
  773.                                             527   c
  774.                                             528      20 continue
  775.   2  3         SO C           +---------    529         do 30 i = 1,n
  776.                               *             530           dx(i) = da*dx(i)
  777.                               *_________    531      30 continue
  778.                                             532         return
  779.                                             533         end
  780.  
  781.  
  782.  Abbreviations Used
  783.   SO       scalar optimization
  784.   C        concurrentized
  785.  
  786.  
  787.  Footnote List
  788.    1: scalar optimization        Statement deleted because of scalar optimization.
  789.    2: scalar optimization        Loop unrolled 4 times to improve scalar performance.
  790.    3: scalar optimization        Cleanup loop for loop unrolling.
  791.  
  792.  PFA/SGI                     10.0 k092805 910529            DSCAL           Loop Summary       10-Aug-1993  10:08:55      Page  22
  793.  
  794.  
  795.      Loop Summary
  796.  
  797.               From    To      Loop       Loop       at      Unroll   Unroll   Iteration
  798.       Loop#   line    line    label      index      nest    weight   factor   workload   Status
  799.       1       521     523     Do 10      I          1       4        4        4          concurrentized
  800.       2       529     531     Do 30      I          1       4        4                   scalar mode preferable
  801.       3       529     531     Do 30      I          1       4        4        4          concurrentized
  802.  
  803.  PFA/SGI                     10.0 k092805 910529            IDAMAX             Source          10-Aug-1993  10:08:55      Page  23
  804.  
  805.  Footnotes     Actions        DO Loops    Line
  806.  
  807.                                             534         integer function idamax(n,dx,incx)
  808.                                             535   c
  809.                                             536   c     finds the index of element having max. dabsolute value.
  810.                                             537   c     jack dongarra, linpack, 3/11/78.
  811.                                             538   c
  812.                                             539         double precision dx(1),dmax
  813.                                             540         integer i,incx,ix,n
  814.                                             541   c
  815.                                             542         idamax = 0
  816.                SO                           543         if( n .lt. 1 ) return
  817.                                             544         idamax = 1
  818.                SO                           545         if(n.eq.1)return
  819.                SO                           546         if(incx.eq.1)go to 20
  820.                                             547   c
  821.                                             548   c        code for increment not equal to 1
  822.                                             549   c
  823.   1            SO                           550         ix = 1
  824.                                             551         dmax = dabs(dx(1))
  825.                SO                           552         ix = ix + incx
  826.   2  3         SO             +---------    553         do 10 i = 2,n
  827.   4            DD SO          !             554            if(dabs(dx(ix)).le.dmax) go to 5
  828.   5            DD             !             555            idamax = i
  829.   4            DD SO          !             556            dmax = dabs(dx(ix))
  830.                SO             !             557       5    ix = ix + incx
  831.                               !_________    558      10 continue
  832.                                             559         return
  833.                                             560   c
  834.                                             561   c        code for increment equal to 1
  835.                                             562   c
  836.                                             563      20 dmax = dabs(dx(1))
  837.   2  3         SO             +---------    564         do 30 i = 2,n
  838.   4            DD             !             565            if(dabs(dx(i)).le.dmax) go to 30
  839.   5            DD             !             566            idamax = i
  840.   4            DD             !             567            dmax = dabs(dx(i))
  841.                               !_________    568      30 continue
  842.                                             569         return
  843.                                             570         end
  844.  
  845.  
  846.  Abbreviations Used
  847.   DD       data dependence
  848.   SO       scalar optimization
  849.  
  850.  
  851.  Footnote List
  852.    1: scalar optimization        Statement deleted because of scalar optimization.
  853.    2: scalar optimization        Loop unrolled 4 times to improve scalar performance.
  854.    3: scalar optimization        Cleanup loop for loop unrolling.
  855.    4: data dependence            Data dependence involving this line due to variable "DMAX".
  856.    5: data dependence            Data dependence involving this line due to variable "IDAMAX".
  857.  
  858.  PFA/SGI                     10.0 k092805 910529            IDAMAX          Loop Summary       10-Aug-1993  10:08:55      Page  24
  859.  
  860.  
  861.      Loop Summary
  862.  
  863.               From    To      Loop       Loop       at      Unroll   Unroll   Iteration
  864.       Loop#   line    line    label      index      nest    weight   factor   workload   Status
  865.       1       553     558     Do 10      I          1       12       4                   unrolled
  866.       2       553     558     Do 10      I          1       12       4                   unrolled
  867.       3       564     568     Do 30      I          1       10       4                   unrolled
  868.       4       564     568     Do 30      I          1       10       4                   unrolled
  869.  
  870.  PFA/SGI                     10.0 k092805 910529            EPSLON             Source          10-Aug-1993  10:08:55      Page  25
  871.  
  872.  Footnotes     Actions        DO Loops    Line
  873.  
  874.                                             571         double precision function epslon (x)
  875.                                             572         double precision x
  876.                                             573   c
  877.                                             574   c     estimate unit roundoff in quantities of size x.
  878.                                             575   c
  879.                                             576         double precision a,b,c,eps
  880.                                             577   c
  881.                                             578   c     this program should function properly on all systems
  882.                                             579   c     satisfying the following two assumptions,
  883.                                             580   c        1.  the base used in representing dfloating point
  884.                                             581   c            numbers is not a power of three.
  885.                                             582   c        2.  the quantity  a  in statement 10 is represented to
  886.                                             583   c            the accuracy used in dfloating point variables
  887.                                             584   c            that are stored in memory.
  888.                                             585   c     the statement number 10 and the go to 10 are intended to
  889.                                             586   c     force optimizing compilers to generate code satisfying
  890.                                             587   c     assumption 2.
  891.                                             588   c     under these assumptions, it should be true that,
  892.                                             589   c            a  is not exactly equal to four-thirds,
  893.                                             590   c            b  has a zero for its last bit or digit,
  894.                                             591   c            c  is not exactly equal to one,
  895.                                             592   c            eps  measures the separation of 1.0 from
  896.                                             593   c                 the next larger dfloating point number.
  897.                                             594   c     the developers of eispack would appreciate being informed
  898.                                             595   c     about any systems where these assumptions do not hold.
  899.                                             596   c
  900.                                             597   c     *****************************************************************
  901.                                             598   c     this routine is one of the auxiliary routines used by eispack iii
  902.                                             599   c     to avoid machine dependencies.
  903.                                             600   c     *****************************************************************
  904.                                             601   c
  905.                                             602   c     this version dated 4/6/83.
  906.                                             603   c
  907.                                             604         a = 4.0d0/3.0d0
  908.                                             605      10 b = a - 1.0d0
  909.                                             606         c = b + b + b
  910.                                             607         eps = dabs(c-1.0d0)
  911.                                             608         if (eps .eq. 0.0d0) go to 10
  912.                                             609         epslon = eps*dabs(x)
  913.                                             610         return
  914.                                             611         end
  915.  
  916.  PFA/SGI                     10.0 k092805 910529            EPSLON          Loop Summary       10-Aug-1993  10:08:55      Page  26
  917.  
  918.  
  919.      Loop Summary
  920.  
  921.               From    To      Loop       Loop       at      Unroll   Unroll   Iteration
  922.       Loop#   line    line    label      index      nest    weight   factor   workload   Status
  923.       1       605     608     Do                    1                                    optimization disabled
  924.  
  925.  PFA/SGI                     10.0 k092805 910529            DMXPY              Source          10-Aug-1993  10:08:55      Page  27
  926.  
  927.  Footnotes     Actions        DO Loops    Line
  928.  
  929.                                             612         subroutine dmxpy (n1, y, n2, ldm, x, m)
  930.                                             613         double precision y(*), x(*), m(ldm,*)
  931.                                             614   c
  932.                                             615   c   purpose:
  933.                                             616   c     multiply matrix m times vector x and add the result to vector y.
  934.                                             617   c
  935.                                             618   c   parameters:
  936.                                             619   c
  937.                                             620   c     n1 integer, number of elements in vector y, and number of rows in
  938.                                             621   c         matrix m
  939.                                             622   c
  940.                                             623   c     y double precision(n1), vector of length n1 to which is added
  941.                                             624   c         the product m*x
  942.                                             625   c
  943.                                             626   c     n2 integer, number of elements in vector x, and number of columns
  944.                                             627   c         in matrix m
  945.                                             628   c
  946.                                             629   c     ldm integer, leading dimension of array m
  947.                                             630   c
  948.                                             631   c     x double precision(n2), vector of length n2
  949.                                             632   c
  950.                                             633   c     m double precision(ldm,n2), matrix of n1 rows and n2 columns
  951.                                             634   c
  952.                                             635   c ----------------------------------------------------------------------
  953.                                             636   c
  954.                                             637   c   cleanup odd vector
  955.                                             638   c
  956.   1            SO                           639         j = mod(n2,2)
  957.                SO                           640         if (j .ge. 1) then
  958.   2  3         SO C           +---------    641            do 10 i = 1, n1
  959.                SO             *             642               y(i) = (y(i)) + x(j)*m(i,j)
  960.                               *_________    643      10    continue
  961.                                             644         endif
  962.                                             645   c
  963.                                             646   c   cleanup odd group of two vectors
  964.                                             647   c
  965.   1            SO                           648         j = mod(n2,4)
  966.                SO                           649         if (j .ge. 2) then
  967.   2  3         SO C           +---------    650            do 20 i = 1, n1
  968.                SO             *             651               y(i) = ( (y(i))
  969.                               *             652        $             + x(j-1)*m(i,j-1)) + x(j)*m(i,j)
  970.                               *_________    653      20    continue
  971.                                             654         endif
  972.                                             655   c
  973.                                             656   c   cleanup odd group of four vectors
  974.                                             657   c
  975.   1            SO                           658         j = mod(n2,8)
  976.                SO                           659         if (j .ge. 4) then
  977.   2  3         SO C           +---------    660            do 30 i = 1, n1
  978.                SO             *             661               y(i) = ((( (y(i))
  979.                               *             662        $             + x(j-3)*m(i,j-3)) + x(j-2)*m(i,j-2))
  980.  
  981.  PFA/SGI                     10.0 k092805 910529            DMXPY              Source          10-Aug-1993  10:08:55      Page  28
  982.  
  983.                               *             663        $             + x(j-1)*m(i,j-1)) + x(j)  *m(i,j)
  984.                               *_________    664      30    continue
  985.                                             665         endif
  986.                                             666   c
  987.                                             667   c   cleanup odd group of eight vectors
  988.                                             668   c
  989.   1            SO                           669         j = mod(n2,16)
  990.                SO                           670         if (j .ge. 8) then
  991.   3  4         SO C           +---------    671            do 40 i = 1, n1
  992.                SO             *             672               y(i) = ((((((( (y(i))
  993.                               *             673        $             + x(j-7)*m(i,j-7)) + x(j-6)*m(i,j-6))
  994.                               *             674        $             + x(j-5)*m(i,j-5)) + x(j-4)*m(i,j-4))
  995.                               *             675        $             + x(j-3)*m(i,j-3)) + x(j-2)*m(i,j-2))
  996.                               *             676        $             + x(j-1)*m(i,j-1)) + x(j)  *m(i,j)
  997.                               *_________    677      40    continue
  998.                                             678         endif
  999.                                             679   c
  1000.                                             680   c   main loop - groups of sixteen vectors
  1001.                                             681   c
  1002.                SO                           682         jmin = j+16
  1003.                LR             +---------    683         do 60 j = jmin, n2, 16
  1004.                LR C           !+--------    684            do 50 i = 1, n1
  1005.   5            DD             !*            685               y(i) = ((((((((((((((( (y(i))
  1006.                               !*            686        $             + x(j-15)*m(i,j-15)) + x(j-14)*m(i,j-14))
  1007.                               !*            687        $             + x(j-13)*m(i,j-13)) + x(j-12)*m(i,j-12))
  1008.                               !*            688        $             + x(j-11)*m(i,j-11)) + x(j-10)*m(i,j-10))
  1009.                               !*            689        $             + x(j- 9)*m(i,j- 9)) + x(j- 8)*m(i,j- 8))
  1010.                               !*            690        $             + x(j- 7)*m(i,j- 7)) + x(j- 6)*m(i,j- 6))
  1011.                               !*            691        $             + x(j- 5)*m(i,j- 5)) + x(j- 4)*m(i,j- 4))
  1012.                               !*            692        $             + x(j- 3)*m(i,j- 3)) + x(j- 2)*m(i,j- 2))
  1013.                               !*            693        $             + x(j- 1)*m(i,j- 1)) + x(j)   *m(i,j)
  1014.                               !*________    694      50    continue
  1015.                               !_________    695      60 continue
  1016.                                             696         return
  1017.                                             697         end
  1018.  
  1019.  
  1020.  Abbreviations Used
  1021.   LR       loop reordering
  1022.   DD       data dependence
  1023.   SO       scalar optimization
  1024.   C        concurrentized
  1025.  
  1026.  
  1027.  Footnote List
  1028.    1: scalar optimization        Statement deleted because of scalar optimization.
  1029.    2: scalar optimization        Loop unrolled 4 times to improve scalar performance.
  1030.    3: scalar optimization        Cleanup loop for loop unrolling.
  1031.    4: scalar optimization        Loop unrolled 2 times to improve scalar performance.
  1032.    5: data dependence            Data dependence involving this line due to variable "Y".
  1033.  
  1034.  PFA/SGI                     10.0 k092805 910529            DMXPY           Loop Summary       10-Aug-1993  10:08:55      Page  29
  1035.  
  1036.  
  1037.      Loop Summary
  1038.  
  1039.               From    To      Loop       Loop       at      Unroll   Unroll   Iteration
  1040.       Loop#   line    line    label      index      nest    weight   factor   workload   Status
  1041.       1       641     643     Do 10      I          1       7        4                   scalar mode preferable
  1042.       2       641     643     Do 10      I          1       7        4        12         concurrentized
  1043.       3       650     653     Do 20      I          1       12       4                   scalar mode preferable
  1044.       4       650     653     Do 20      I          1       12       4        23         concurrentized
  1045.       5       660     664     Do 30      I          1       22       4                   scalar mode preferable
  1046.       6       660     664     Do 30      I          1       22       4        45         concurrentized
  1047.       7       671     677     Do 40      I          1       42       2                   scalar mode preferable
  1048.       8       671     677     Do 40      I          1       42       2        89         concurrentized
  1049.       9       684     694     Do 50      I          1                                    concurrentized
  1050.       10      683     695     Do 60      J          2       111      1                   already in a parallel loop
  1051.  
  1052.  PFA/SGI                     10.0 k092805 910529            _MAIN              Source          10-Aug-1993  10:08:55      Page  30
  1053.  
  1054.  Footnotes     Actions        DO Loops    Line
  1055.  
  1056.                                             698
  1057.                                             699
  1058.